library(tidyverse)
library(stargazer)

setwd("Code/dataverse/")

qr <- read_csv("4_samples_cleaned.csv")




##desc


agg = aggregate(qr,
                by = list(qr$which),
                FUN = mean)

##original order is fb ..... high. ... low .... mturk
## we want "High", "MTurk", "FB", "Low"

cc<-c(agg$croat_correct[2], agg$croat_correct[4], agg$croat_correct[1], agg$croat_correct[3])
  
mc<-c(agg$malawi_correct[2], agg$malawi_correct[4], agg$malawi_correct[1], agg$malawi_correct[3])

tc<-c(agg$ted_correct[2], agg$ted_correct[4], agg$ted_correct[1], agg$ted_correct[3])


  
pdf(file = "fig1.pdf", width =7, height = 4)
par(mfrow=c(1,3))

barplot(cc, names.arg = c( "High", "MTurk", "FB", "Low"), main = "PM of Croatia"  , cex.names=1.8, las = 2) 


barplot(mc, names.arg = c("High", "MTurk", "FB", "Low"), main = "Capital of Malawi", cex.names=1.8, las = 2)

barplot(tc, names.arg = c("High", "MTurk", "FB", "Low"), main = "National Park ", cex.names=1.8, las = 2)

dev.off()



qr$total_correct<-qr$croat_correct + qr$ted_correct + qr$malawi_correct




##############low end 




high_ages<-qr$low_end_mean[qr$which == "high" & is.na(qr$low_end_mean) == F]
fb_ages<-qr$low_end_mean[qr$which == "fb" & is.na(qr$low_end_mean) == F]
mturk_ages<-qr$low_end_mean[qr$which == "mturk" & is.na(qr$low_end_mean) == F]
low_ages<-qr$low_end_mean[qr$which == "low" & is.na(qr$low_end_mean) == F]

ages<-data.frame(age = c(fb_ages, mturk_ages,high_ages , low_ages), group = c(rep("FB", length(fb_ages)),
                                                                              rep("MTurk", length(mturk_ages)),
                                                                              rep("High", length(high_ages)), 
                                                                              rep("Low", length(low_ages))))



pdf("fig2b.pdf", width = 9, height = 6)
ggplot(ages, aes(x = age, fill = group)) + 
  geom_density(alpha = 0.5 )+
  scale_fill_manual( values = c("blue","red", "white", "green"))+ #, labels = c("Facebook", "MTurk", "High"))+
  xlab("Low End") +
  ylab("Density of Respondents") +
  
  ggtitle("Low End Scale of Online Samples") +
  theme(plot.title = element_text(hjust=.5,size = 20), legend.text=element_text(size=20)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x=element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.text.y=element_blank(),
        axis.title.y = element_text(size=15),
        
        axis.line = element_line(colour = "black")) 


dev.off()









####



qr_fb<-filter(qr, which == "fb")
qr_mturk<-filter(qr, which == "mturk")
qr_high<-filter(qr, which == "high")

pdf(file = "fig3c.pdf", width = 9, height = 11)
par(mfrow=c(3,1))

scatter.smooth(qr_fb$age, y = qr_fb$low_end_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Low End", main ="Facebook Sample")


scatter.smooth(qr_mturk$age, y = qr_mturk$low_end_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Low End", main ="MTurk Sample")



scatter.smooth(qr_high$age, y = qr_high$low_end_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Low End",  main ="High DL Sample")


dev.off()



pdf(file = "fig3d.pdf", width = 9, height = 11)
par(mfrow=c(3,1))

scatter.smooth(qr_fb$age, y = qr_fb$total_correct, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Information Retrieval", main ="Facebook Sample")


scatter.smooth(qr_mturk$age, y = qr_mturk$total_correct, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Information Retrieval", main ="MTurk Sample")



scatter.smooth(qr_high$age, y = qr_high$total_correct, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Information Retrieval",  main ="High DL Sample")


dev.off()



############Table 1






reg1l<-(lm( total_correct ~ harg_mean, data = qr_fb))
reg2l<-(lm( total_correct ~ harg_mean +low_end_mean , data = qr_fb))
reg3l<-(lm( total_correct ~ harg_mean +low_end_mean + power_mean, data = qr_fb))
reg4l<-(lm( total_correct ~ harg_mean +low_end_mean + power_mean + age, data = qr_fb))



reg5l<-(lm( total_correct ~ harg_mean, data = qr_mturk))
reg6l<-(lm( total_correct ~ harg_mean +low_end_mean , data = qr_mturk))
reg7l<-(lm( total_correct ~ harg_mean +low_end_mean + power_mean, data = qr_mturk))
reg8l<-(lm( total_correct ~ harg_mean +low_end_mean + power_mean + age, data = qr_mturk))

summary(reg1l)


stargazer(reg1l, reg2l, reg3l, reg4l, reg5l, reg6l, reg7l, reg8l, out = "table1.tex")


####Appendix tables
power_25th<-summary(qr$power_mean)[2]
power_75th<-summary(qr$power_mean)[5]


qr$under<-0
qr$under[qr$power_mean < power_25th & qr$total_correct == 3]  <- 1 


  qr$race<- as.numeric( qr$Q81)
  
  qr$race[qr$race > 1 ]<-0
  
  qr$white<- qr$race
  
  
  qr$male<-  as.numeric( qr$Q4)
  
  qr$male[qr$male > 1 ]<-0
  
  

  qr$educ<-  as.numeric( qr$Q5)
  

  
  summary(lm(under ~ age + white + male + educ + which , data = qr))
  
  
  

qr$over<-0
qr$over[qr$power_mean > power_75th & qr$total_correct <2]  <- 1 




summary(lm(over ~ age + white + male + educ + which , data = qr))


reg1l<-lm(under ~ age + white + male + educ + which , data = qr)
reg2l<-lm(over ~ age + white + male + educ + which , data = qr)


##Table 2
stargazer(reg1l, reg2l, out = "table2.tex")

harg_25th<-summary(qr$harg_mean)[2]
harg_75th<-summary(qr$harg_mean)[5]


qr$under<-0
qr$under[qr$harg_mean < harg_25th & qr$total_correct == 3]  <- 1 



summary(lm(under ~ age + white + male + educ + which , data = qr))




qr$over<-0
qr$over[qr$harg_mean > harg_75th & qr$total_correct <2]  <- 1 




summary(lm(over ~ age + white + male + educ + which , data = qr))


reg1l<-lm(under ~ age + white + male + educ + which , data = qr)
reg2l<-lm(over ~ age + white + male + educ + which , data = qr)


##Table 3
stargazer(reg1l, reg2l, out = "table3.tex")




#######Graphs with Lucid data


qr<-read_csv("samples_cleaned_w_lucid.csv")


qr_fb<-filter(qr, which == "fb")
qr_mturk<-filter(qr, which == "mturk")
qr_high<-filter(qr, which == "high")

qr_lucid<-filter(qr, which == "lucid")



pdf(file = "fig3a.pdf", width = 9, height = 11)
par(mfrow=c(2,2))
scatter.smooth(qr_fb$age, y = qr_fb$harg_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Internet Skills", main ="Facebook Sample")


scatter.smooth(qr_mturk$age, y = qr_mturk$harg_mean, span = 2/3, degree = 1,xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Internet Skills", main ="MTurk Sample")



scatter.smooth(qr_high$age, y = qr_high$harg_mean, span = 2/3, degree = 1,xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Internet Skills",  main ="High DL Sample")

scatter.smooth(qr_lucid$age, y = qr_lucid$harg_mean, span = 2/3, degree = 1,xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Internet Skills",  main ="Lucid Sample")


dev.off()



pdf(file = "fig3b.pdf", width = 9, height = 11)
par(mfrow=c(2,2))

scatter.smooth(qr_fb$age, y = qr_fb$power_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Power User", main ="Facebook Sample")


scatter.smooth(qr_mturk$age, y = qr_mturk$power_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Power User", main ="MTurk Sample")



scatter.smooth(qr_high$age, y = qr_high$power_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Power User",  main ="High DL Sample")

scatter.smooth(qr_lucid$age, y = qr_lucid$power_mean, span = 2/3, degree = 1, xlim= c(18,90),
               family = c("symmetric", "gaussian"),
               xlab = "Age", ylab = "Power User",  main ="Lucid Sample")


dev.off()







###########


usa<-c(rep(18, 220) ,
       rep(19, 220) ,
       rep(20, 215) ,
       rep(21, 215) ,
       rep(22, 215) ,
       rep(23, 215) ,
       rep(24, 215) ,
       rep(25, 211) ,rep(26, 211) ,rep(27, 211) ,rep(28, 211) ,rep(29, 211) ,
       rep(30, 199 ) ,rep(31, 199 ) ,rep(32, 199 ) ,rep(33, 199 ) ,rep(34, 199 ) ,
       rep(35, 201) ,rep(36, 201) ,rep(37, 201) ,rep(38, 201) ,rep(39, 201) ,
       rep(40, 208) ,rep(41, 208) ,rep(42, 208) ,rep(43, 208) ,rep(44, 208) ,
       rep(45, 227) ,rep(46, 227) ,rep(47, 227) ,rep(48, 227) ,rep(49, 227) ,
       rep(50, 222) ,rep(51, 222) ,rep(52, 222) ,rep(53, 222) ,rep(54, 222) ,
       rep(55, 196) ,rep(56, 196) ,rep(57, 196) ,rep(58, 196) ,rep(59, 196) ,
       rep(60, 168) ,rep(61, 168) ,rep(62, 168) ,rep(63, 168) ,rep(64, 168) ,
       rep(65, 124) ,rep(66, 124) ,rep(67, 124) ,rep(68, 124) ,rep(69, 124) ,
       rep(70, 92) ,rep(71, 92) ,rep(72, 92) ,rep(73, 92) ,rep(74, 92)  ,
       rep(75, 73 ) ,rep(76, 73 ) ,rep(77, 73 ) ,rep(78, 73 ) ,rep(79, 73 ) ,
       rep(80, 57) ,rep(81, 57) ,rep(82, 57) ,rep(83, 57) ,rep(84, 57) ,
       rep(85, 54/2), rep(86, 54/2), rep(87, 54/2) , rep(88, 54/2) , rep(89, 54/2),
       rep(90, 54/2), rep(91, 54/2), rep(92, 54/2) , rep(93, 54/2) , rep(94, 54/2)) 



high_ages<-qr$age[qr$which == "high" & is.na(qr$age) == F]
fb_ages<-qr$age[qr$which == "fb" & is.na(qr$age) == F]
mturk_ages<-qr$age[qr$which == "mturk" & is.na(qr$age) == F]
lucid_ages<-qr$age[qr$which == "lucid" & is.na(qr$age) == F]



ages<-data.frame(age = c(fb_ages, mturk_ages,high_ages, lucid_ages, usa ),
                 group = c(rep("FB", length(fb_ages)),
                           rep("MTurk", length(mturk_ages)),
                           rep("High", length(high_ages)),
                           rep("Lucid", length(lucid_ages)),
                           rep("USA", length(usa))))

table(qr$which)

pdf("fig2d.pdf", width = 9, height = 6)
ggplot(ages, aes(x = age, fill = group)) + 
  geom_density(alpha = 0.75 )+
  scale_fill_manual( values = c("blue","red", "white", "yellow", "green"))+ #, labels = c("Facebook", "MTurk", "High"))+
  xlab("Age") +
  ylab("Density of Respondents") +
  
  ggtitle("Ages of Online Samples") +
  theme(plot.title = element_text(hjust=.5,size = 20), legend.text=element_text(size=20)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x=element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.text.y=element_blank(),
        axis.title.y = element_text(size=15),
        
        axis.line = element_line(colour = "black")) 


dev.off()


#################################DL


fb_ages<-qr$harg_mean[qr$which == "fb" & is.na(qr$harg_mean) == F]
mturk_ages<-qr$harg_mean[qr$which == "mturk" & is.na(qr$harg_mean) == F]
low_ages<-qr$harg_mean[qr$which == "low" & is.na(qr$harg_mean) == F]
lucid_ages<-qr$harg_mean[qr$which == "lucid" & is.na(qr$harg_mean) == F]


ages<-data.frame(age = c(fb_ages, mturk_ages, lucid_ages, low_ages), group = c(rep("FB", length(fb_ages)),
                                                                               rep("MTurk", length(mturk_ages)),
                                                                               rep("Lucid", length(lucid_ages)), 
                                                                               rep("Low", length(low_ages))))

table(ages$group)


pdf("fig2a.pdf", width = 9, height = 6)
ggplot(ages, aes(x = age, fill = group)) + 
  geom_density(alpha = 0.5 )+
  scale_fill_manual( values = c("blue","red", "white", "green"))+ #, labels = c("Facebook", "MTurk", "High"))+
  xlab("Internet Skills") +
  ylab("Density of Respondents") +
  
  ggtitle("Internet Skills of Online Samples") +
  theme(plot.title = element_text(hjust=.5,size = 20), legend.text=element_text(size=20)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x=element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.text.y=element_blank(),
        axis.title.y = element_text(size=15),
        
        axis.line = element_line(colour = "black")) 


dev.off()




#################################


high_ages<-qr$power_mean[qr$which == "high" & is.na(qr$power_mean) == F]
fb_ages<-qr$power_mean[qr$which == "fb" & is.na(qr$power_mean) == F]
mturk_ages<-qr$power_mean[qr$which == "mturk" & is.na(qr$power_mean) == F]
lucid_ages<-qr$power_mean[qr$which == "lucid" & is.na(qr$power_mean) == F]




ages<-data.frame(age = c(fb_ages, mturk_ages, lucid_ages, high_ages), group = c(rep("FB", length(fb_ages)),
                                                                               rep("MTurk", length(mturk_ages)),
                                                                               rep("Lucid", length(lucid_ages)), 
                                                                               rep("High", length(high_ages))))
                     
pdf("fig2c.pdf", width = 9, height = 6)
ggplot(ages, aes(x = age, fill = group)) + 
  geom_density(alpha = 0.5 )+
  scale_fill_manual( values = c("blue","red", "white", "green"))+ 
  xlab("Power User") +
  ylab("Density of Respondents") +
  
  ggtitle("Power Usage of Online Samples") +
  theme(plot.title = element_text(hjust=.5,size = 20), legend.text=element_text(size=20)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks.y=element_blank(),
        axis.text.x=element_text(size=15),
        axis.title.x = element_text(size=15),
        axis.text.y=element_blank(),
        axis.title.y = element_text(size=15),
        
        axis.line = element_line(colour = "black")) 


dev.off()




